0.1 Background

0.2 Handel Polyclonal infections

Create two objects containing the list of samples that are monoclonals and polyclonals

monoclonals = Pfal_rGenome_object@data$metadata$Sample_id

polyclonals = Pfal_rGenome_object@data$metadata$Sample_id

Calculate the expected heterozygosity assuming that all samples are diploids

Pfal_rGenome_object@data$loci_table$ExpHet_all_as_diploid = 
  get_ExpHet(obj = Pfal_rGenome_object, update_AC = T, monoclonals = NULL, polyclonals = polyclonals)

Calculate the expected heterozygosity considering that all samples are monoclonals, it means that secondary alleles won’t be included

Pfal_rGenome_object@data$loci_table$ExpHet_all_as_haploid = 
  get_ExpHet(obj = Pfal_rGenome_object, update_AC = T, monoclonals = monoclonals)

Calculate the expected heterozygosity defining the ploidy of each position individually

Pfal_rGenome_object@data$loci_table$ExpHet_default = get_ExpHet(obj = Pfal_rGenome_object)

Create a plot of ExpHet using the different approaches

Pfal_rGenome_object@data$loci_table %>%
  pivot_longer(cols = c('ExpHet_all_as_haploid', 'ExpHet_default'), names_to = 'Treatment', values_to = 'ExpHet')%>%
  ggplot(aes(x = ExpHet_all_as_diploid, y = ExpHet))+
  geom_point(alpha = .1)+
  facet_grid(.~Treatment)+
  theme_bw()

Create a plot of the distribution of ExpHet by by type of marker

Pfal_rGenome_object@data$loci_table %>%
  pivot_longer(cols = c('ExpHet_all_as_diploid','ExpHet_all_as_haploid', 'ExpHet_default'), names_to = 'Treatment', values_to = 'ExpHet')%>%
  ggplot(aes(fill = Treatment, x = ExpHet))+
  geom_histogram(alpha = .4)+
  facet_grid(TypeOf_Markers~Treatment, scales = 'free_y')+
  theme_bw()

Calculate Heterozygosity by each population (Subnational_level0: Regions within Colombia)

Pfal_rGenome_object@data$metadata[is.na(Pfal_rGenome_object@data$metadata$Country),][['Country']] = 'Perú'

ExpHet_by_Pop= 
  get_ExpHet(obj = Pfal_rGenome_object, update_AC = T, monoclonals = NULL, polyclonals = polyclonals, by = 'Country')
ExpHet_by_Pop %>%
  pivot_longer(cols = c('Perú', 'Venezuela'), names_to = 'Region', values_to = 'ExpHet')%>%
  ggplot(aes(fill = Region, x = ExpHet))+
  geom_histogram(alpha = .4)+
  facet_grid(TypeOf_Markers~Region, scales = 'free_y')+
  theme_bw()

0.3 Nucleotide diversity

Calculate Nuc. Div. by gene by Population (Subnational_level0)

pi_by_gene_by_country = get_nuc_div(Pfal_rGenome_object, polyclonals = polyclonals, gff = '../reference/PlasmoDB-59_Pfalciparum3D7.gff', type_of_region = 'gene', window = NULL, by = 'Country')
mhp_pi_by_gene_by_country = pi_by_gene_by_country %>%
  pivot_longer(cols = c('Perú_pi', 'Venezuela_pi', 'Total_pi'), names_to = 'Regions', values_to = 'pi')%>%
  mutate(CHROM2  = gsub('(^Pf3D7_|_v3)', '',seqid))%>%
  ggplot(aes(x = start, y = pi, color = Regions)) +
    geom_point(alpha = 0.5, size = .25) +
    scale_color_manual(values = c('dodgerblue3', 'firebrick3', 'gray40'))+
    facet_grid(Regions ~ CHROM2, scales = 'free_x')+
    theme_minimal()+
    labs(y = 'Nuc. Div.', x = 'Chromosomal position', color = 'Region')+
    theme(legend.position = 'right',
          axis.text.x = element_blank(),
          axis.title.x = element_blank())

mhp_pi_by_gene_by_country

mhp_pi_by_gene_by_country = ggplotly(mhp_pi_by_gene_by_country)

chromosomes = c('Pf3D7_01_v3', 'Pf3D7_02_v3', 'Pf3D7_03_v3', 'Pf3D7_04_v3',
                'Pf3D7_05_v3', 'Pf3D7_06_v3', 'Pf3D7_07_v3', 'Pf3D7_08_v3',
                'Pf3D7_09_v3', 'Pf3D7_10_v3', 'Pf3D7_11_v3', 'Pf3D7_12_v3',
                'Pf3D7_13_v3', 'Pf3D7_14_v3', 'Pf3D7_API_v3', 'Pf3D7_MIT_v3')

for(region in 1:3){
  for(chrom in 1:16){
    mhp_pi_by_gene_by_country$x$data[[chrom + (region - 1)*16]]$text = paste(mhp_pi_by_gene_by_country$x$data[[chrom + (region - 1)*16]]$text,
      pi_by_gene_by_country[pi_by_gene_by_country$seqid == chromosomes[chrom],]$gene_description, sep = '<br />')
  }
}


mhp_pi_by_gene_by_country

0.4 Genetic differentiation

pca_PerVen = fastGRM(obj = Pfal_rGenome_object, k = 2, polyclonals = polyclonals, Pop = 'Country')

pca_PerVen  %>% ggplot(aes(x = PC1, y = PC2, color = Country))+
  geom_point(alpha = .7, size = 2) +
  stat_ellipse(level = .6)+
  scale_color_manual(values = c('dodgerblue3', 'firebrick3'))+
  theme_bw()+
  labs(title = 'WGS - PerVen',
       color = 'Regions')

Genetic Differentiation within Peru

Pfal_rGenome_object_Peru = filter_samples(Pfal_rGenome_object, v = 
                                          Pfal_rGenome_object@data$metadata$Country == 'Perú')

polyclonals_per = Pfal_rGenome_object_Peru@data$metadata$Sample_id

pca_Per = fastGRM(obj = Pfal_rGenome_object_Peru, k = 2, polyclonals = polyclonals_per, Pop = 'Subnational_level0')

pca_Per  %>% ggplot(aes(x = PC1, y = PC2, color = Subnational_level0))+
  geom_point(alpha = .7, size = 2) +
  stat_ellipse(level = .6)+
  theme_bw()+
  labs(title = 'WGS - Per',
       color = 'Regions')

Genetic Differentiation within Peru

Pfal_rGenome_object_Ven = filter_samples(Pfal_rGenome_object, v = 
                                          Pfal_rGenome_object@data$metadata$Country == 'Venezuela')


polyclonals_ven = Pfal_rGenome_object_Ven@data$metadata$Sample_id

pca_Ven = fastGRM(obj = Pfal_rGenome_object_Ven, k = 2, polyclonals = polyclonals_ven, Pop = 'Subnational_level2')


pca_Ven  %>% ggplot(aes(x = PC1, y = PC2, color = Subnational_level2))+
  geom_point(alpha = .7, size = 2) +
  stat_ellipse(level = .6)+
  theme_bw()+
  labs(title = 'WGS - Ven',
       color = 'Regions')